#Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

#Loading the dataset
df = pd.read_csv('Launch SFR.csv')
df.head()

#Checking the shape of the dataset
df.shape

(183, 12)

#Checking the datatypes of the columns
df.dtypes

Company              object
SFR                   int64
Payload (kg)         object
Launch Cost ($M)    float64
Price ($/kg)         object
Funding ($M)         object
Launch Class         object
Orbit Altitude       object
Tech Type            object
Country              object
HQ Location          object
Description          object
dtype: object

#type casting
df['Payload (kg)'] = df['Payload (kg)'].astype(str).str.replace(',', '').astype(float)
df['Launch Cost ($M)'] = df['Launch Cost ($M)'].astype(str).str.replace(',', '').astype(float)
df['Price ($/kg)'] = df['Price ($/kg)'].astype(str).str.replace(',', '').astype(float)

#Checking '-' (null values) values in the dataset
df.isin(['-']).sum()

Company               0
SFR                   0
Payload (kg)          0
Launch Cost ($M)      0
Price ($/kg)          0
Funding ($M)        110
Launch Class         16
Orbit Altitude        3
Tech Type             0
Country               0
HQ Location           8
Description           0
dtype: int64

#droping the column
df.drop(['Funding ($M)'], axis=1, inplace=True)

#replacing the '-' values with mode
df['Launch Class'].replace('-', df['Launch Class'].mode()[0], inplace=True)
df['Orbit Altitude'].replace('-', df['Orbit Altitude'].mode()[0], inplace=True)

df.drop(['HQ Location'], axis=1, inplace=True)

#Checking for unique values in the dataset
df.nunique()

Company             183
SFR                  10
Payload (kg)         65
Launch Cost ($M)     46
Price ($/kg)         53
Launch Class         10
Orbit Altitude        4
Tech Type             8
Country              31
Description         183
dtype: int64

cols = ['Launch Class', 'Orbit Altitude', 'Tech Type']
for i in cols:
    print(i, df[i].unique(), '\n')

Launch Class ['Medium, Heavy' 'Small' 'Tourism, Small, Heavy' 'Small, Medium'
 'Heavy, Super Heavy' 'Tourism' 'Medium' 'Small, Heavy' 'Tourism, Medium'
 'Heavy'] 

Orbit Altitude ['LEO' 'Suborbital' 'GTO' 'Lunar'] 

Tech Type ['Rocket' 'Balloon' 'Balloon, Rocket' 'Spaceplane' 'Plane, Rocket' 'Other'
 'Rocket, Other' 'Rocket, Spaceplane']

def l_class(launch):
    if launch in ['Medium, Heavy','Medium']:
        return 'Medium'
    elif launch in ['Small', 'Small, Medium', 'Small, Heavy']:
        return 'Small'
    elif launch in ['Heavy', 'Heavy, Super Heavy']:
        return 'Heavy'
    elif launch in ['Tourism', 'Tourism, Small, Heavy','Tourism, Medium']:
        return 'Tourism'
df['Launch Class'] = df['Launch Class'].apply(l_class)

df['Launch Class'].value_counts()

Launch Class
Small      152
Medium      15
Tourism     10
Heavy        6
Name: count, dtype: int64

def tech_type(tech):
    if tech in ['Rocket', 'Plane, Rocket', 'Rocket, Other', 'Rocket, Spaceplane']:
        return 'Rocket'
    elif tech in ['Ballon', 'Balloon, Rocket']:
        return 'Balloon'
    elif tech in ['Spaceplane']:
        return 'Spaceplane'
    else:
        return 'Other'
df['Tech Type'] = df['Tech Type'].apply(tech_type)

df['Tech Type'].value_counts()

Tech Type
Rocket        133
Other          23
Spaceplane     20
Balloon         7
Name: count, dtype: int64

def description(description):
    if description in [
    'Developing the Vega & Ariane launch vehicles',
    'Developing the Ceres-1 and Pallas-1 launch vehicles',
    'Developing the Firefly Alpha launch vehicle; highest payload performance with the lowest cost per kg to orbit in its vehicle class',
    'Developing suborbital rockets to provide access and research for traditional and ‘New-Space’ markets',
    'The first rocket company and launch site for cubesat payloads in New Zealand',
    'Developing the ERIS launch vehicles to provide reliable and cost-effective access to space',
    'Buildig the Xogdor rocket to test payloads at supersonic speeds and at the edge of space',
    'Building a private 3-stage nanosatellite launch vehicle in China',
    'Developing the LAROS-RC2 orbital carrier and accompanying mobile launch infrastructure',
    'Developing the Trans-Atmospheric Flight Vehicle (TAV 1)',
    'Developing suborbital and orbital launch vehicles',
    'Developing a series of Launch Vehicles based on high-altitude air launch',
    'Building a hypersonic space plane that can takeoff from anywhere in any weather',
    'Developing a unique launch vehicle and propulsion system',
    'Building a space launch system for sending hardened satellites and bulk cargo into space',
    'Developing a reactive, reliable and cost-efficient nano-launcher',
    'Developing a next generation of reusable launch vehicles for microgravity research',
    'Building a reusable three-person rocket ship for space tourism',
    'Developing Infinity, a small reusable rocket',
    'Developing high-performance, low carbon micro launch vehicles']:
        return 'Launch Vehicle Development'
    elif description in [
    'Providing routine launch access to Earth orbit for entrepreneurs and enterprises',
    'Launch vehicle manufacturer and launch services provider',
    'Commercial launch vehicle manufacturer and space launch provider in China',
    'Launch services for small, micro and nano satellites',
    'Providing launch services to LEO at an affordable cost',
    'Enabling Low cost access to space with the Aerospike engined reusable Small satellite launch vehicle "MESO"',
    'Providing passengers with a trip into the stratosphere',
    'Provides earth-to-space space delivery services for small payloads',
    'Dedicated nanosatellite launch provider',
    'Rapid response small satellite launch vehicles for government and commercial customers',
    'Provding dedicated launch services for cube and nanosatellites',
    'Integrated launch services for the Zenit Launch Vehicle via a mobile sea platform',
    'Cost-effective small satellite launch services from the United Kingdom',
    'SpaceRyde offers affordable, on-schedule, dedicated launch for small sats',
    'Enabling transportation to LEO',
    'Reusable hybrid rocketry',
    'Developing a reliable tow-glider launch system',
    'Affordable and reliable small satellite launch system for LEO, SSO, and GEO missions',
    'Customized launch services for sub-orbital and orbital payloads',
    'SpaceBox is a suborbital launch and recovery platform designed to enable affordable access to space for educational, professional, consumer and hobbyist payloads']:
        return 'Launch Services'
    elif description in [
    'Offers a range of sounding rockets, capable of flights up to 300km in multistage configurations',
    'Balloons that lift anything from a few kilograms to several tons and are able to operate at an altitude as low as a few hundred meters or as high as 40 km',
    'Launching stratospheric balloons for research and promotional purposes',
    'Balloon-based small satellite launcher',
    'Building a ballooning platform to offer novel access to the mesosphere']:
        return 'Balloon-Based Technologies'
    elif description in [
    'Building rockets to launch small satellites',
    'Developing a zero-emission space tourism platform',
    'Personalized engineering support and dedicated airborne orbital launch platform',
    'Revolutionizing near space tourism and opening it to a greater audience']:
        return 'Space Tourism Suborbital'
    elif description in [
    'Designs, manufactures, and operates launch vehicles, propulsion systems, and satellites and related components',
    'Developing Dream Chaser, a multi-mission space utility vehicle designed to transport crew and cargo to low-Earth orbit',
    'A rocket, satellite, and spacecraft manufacturing company.',
    'Mass production of on-demand launchers for small sats',
    'Designing a single stage to orbit hypersonic vehicle of revolutionary design and propulsion capability',
    'Building a single-stage to orbit launch system dedicated to small payloads']:
        return 'Satellite Technology and Services'
    elif description in  [
    'Developing a unique line of rockets powered by bio-derived fuels to launch tiny satellites into space',
    'Using clean tech to develop a sustainable and cheap rocket called Haribon SLS',
    'Developing hybrid small satellite launch vehicles',
    'Privately developing rocket engines and suborbital launch vehicles in Japan',
    'Creating a reusable suborbital space complex for tourist flights into space',
    'Using RAM-accelerators to change the economics of space launch',
    'Developing a range of sustainable, reusable launchers dedicated to the launch of small satellites',
    'Developing electromagnetic launch systems to change how we launch payloads into space',
    'Redesigning launch from the ground up',
    'Developing SOL ASPIRET, a suborbital spaceplane',
    'Developing PROTEUS, an innovative hybrid and autonomous launcher for small sats']:
        return 'Innovative Propulsion Technologies'
    else:
        return 'Space Access and Technology Innovation'
df['Description'] = df['Description'].apply(description)

df['Description'].value_counts()

Description
Space Access and Technology Innovation    123
Launch Vehicle Development                 19
Launch Services                            18
Innovative Propulsion Technologies         10
Satellite Technology and Services           6
Space Tourism Suborbital                    4
Balloon-Based Technologies                  3
Name: count, dtype: int64

df.describe()

df['Payload (kg)'] = df['Payload (kg)'].replace(0, df['Payload (kg)'].mean())
df['Launch Cost ($M)'] = df['Launch Cost ($M)'].replace(0, df['Launch Cost ($M)'].mean())
df['SFR'] = df['SFR'].replace(0, df['SFR'].median())

df.drop(columns = 'Price ($/kg)', axis=1, inplace=True)

df.head()

sns.countplot(x='Country', data=df, order=df['Country'].value_counts().index).set_title('Launches by Country')
plt.xticks(rotation=90)

sns.countplot(x = 'Tech Type', data = df, order = df['Tech Type'].value_counts().index).set_title('Launches by Tech Type')

Text(0.5, 1.0, 'Launches by Tech Type')

sns.countplot(x = 'Launch Class', data = df, order = df['Launch Class'].value_counts().index).set_title('Launches by Launch Class')

Text(0.5, 1.0, 'Launches by Launch Class')

sns.countplot(x = 'Orbit Altitude', data = df, order = df['Orbit Altitude'].value_counts().index).set_title('Launches by Orbit Altitude')

Text(0.5, 1.0, 'Launches by Orbit Altitude')

sns.countplot(x = 'Description', data = df, order = df['Description'].value_counts().index).set_title('Launches by Description')
plt.xticks(rotation=90)

([0, 1, 2, 3, 4, 5, 6],
 [Text(0, 0, 'Space Access and Technology Innovation'),
  Text(1, 0, 'Launch Vehicle Development'),
  Text(2, 0, 'Launch Services'),
  Text(3, 0, 'Innovative Propulsion Technologies'),
  Text(4, 0, 'Satellite Technology and Services'),
  Text(5, 0, 'Space Tourism Suborbital'),
  Text(6, 0, 'Balloon-Based Technologies')])

sns.histplot(x = 'Payload (kg)', data = df, bins = 50).set_title('Payload Distribution')

Text(0.5, 1.0, 'Payload Distribution')

sns.histplot(x = 'Launch Cost ($M)', data = df, bins = 50).set_title('Launch Cost Distribution')

Text(0.5, 1.0, 'Launch Cost Distribution')

sns.histplot(x = 'SFR', data = df, bins = 9).set_title('SFR Distribution')

Text(0.5, 1.0, 'SFR Distribution')

sns.barplot(y = 'Country', x = 'SFR', data = df[df['SFR'] > 6], estimator = len, order = df[df['SFR'] > 6]['Country'].value_counts().index).set_title('Number of Launches by Country having SFR > 6')
plt.xlabel('Number of Launches')

Text(0.5, 0, 'Number of Launches')

sns.countplot(x = 'Tech Type', data = df, hue= 'SFR', palette= 'Set1').set_title('SFR and Tech Type')

Text(0.5, 1.0, 'SFR and Tech Type')

sns.countplot(x = 'Launch Class', data = df, hue= 'SFR', palette= 'Set1').set_title('SFR and Launch Class')

Text(0.5, 1.0, 'SFR and Launch Class')

sns.countplot(x = 'Orbit Altitude', data = df, hue= 'SFR', palette= 'Set1').set_title('SFR and Orbit Altitude')

Text(0.5, 1.0, 'SFR and Orbit Altitude')

sns.countplot(x = 'Description', data = df, hue= 'SFR', palette= 'Set1').set_title('SFR and Description')
plt.xticks(rotation=90)

([0, 1, 2, 3, 4, 5, 6],
 [Text(0, 0, 'Launch Vehicle Development'),
  Text(1, 0, 'Launch Services'),
  Text(2, 0, 'Balloon-Based Technologies'),
  Text(3, 0, 'Space Access and Technology Innovation'),
  Text(4, 0, 'Satellite Technology and Services'),
  Text(5, 0, 'Space Tourism Suborbital'),
  Text(6, 0, 'Innovative Propulsion Technologies')])

fig, ax = plt.subplots(1,2,figsize=(15, 8))
sns.boxplot(x = 'SFR', y = 'Payload (kg)', data = df, ax = ax[0], palette = 'Set1').set_title('SFR and Payload')
sns.violinplot(x = 'SFR', y = 'Payload (kg)', data = df, ax = ax[1], palette = 'Set1').set_title('SFR and Payload')

Text(0.5, 1.0, 'SFR and Payload')

fig, ax = plt.subplots(1,2,figsize=(15, 8))
sns.boxplot(x = 'SFR', y = 'Launch Cost ($M)', data = df, ax = ax[0], palette = 'Set1').set_title('SFR and Launch Cost')
sns.violinplot(x = 'SFR', y = 'Launch Cost ($M)', data = df, ax = ax[1], palette = 'Set1').set_title('SFR and Launch Cost')

Text(0.5, 1.0, 'SFR and Launch Cost')

#dropping column country and company name because the SFR is dependent upon the mission specific parameters
df.drop(columns = ['Country', 'Company'], axis=1, inplace=True)

#Using Z score to remove outliers
cols = ['Payload (kg)', 'Launch Cost ($M)']
from scipy import stats
z = np.abs(stats.zscore(df[cols]))
df = df[(z < 3).all(axis=1)]

from sklearn.preprocessing import LabelEncoder

#Label Encoding Object
le = LabelEncoder()

#object type columns
obj_cols = ['Launch Class', 'Orbit Altitude', 'Tech Type', 'Description']

#Label Encoding
for i in obj_cols:
    le.fit(df[i])
    df[i] = le.transform(df[i])
    print(i, df[i].unique(), '\n')

Launch Class [2 3 1 0] 

Orbit Altitude [1 3 0 2] 

Tech Type [2 1 0 3] 

Description [2 0 5 3 4 6 1]

df['SFR'] = df['SFR'].apply(lambda x: 1 if x > 6 else 0)

plt.figure(figsize=(8,8))
sns.heatmap(df.corr(), annot=True)

<Axes: >

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df.drop(columns = 'SFR', axis=1), df['SFR'], test_size=0.3, random_state=0)

from sklearn.ensemble import RandomForestClassifier
#Random Forest Classifier Object
rfc = RandomForestClassifier()

from sklearn.model_selection import GridSearchCV

#parameters for GridSearchCV
para_grid = {
    'min_samples_split': [2,4,6,8],
    'max_depth': [2,4,6,8],
    'min_samples_leaf': [2,4,6,8],
    'random_state': [0,42]
}

#GridSearchCV Object
grid = GridSearchCV(estimator=rfc, param_grid=para_grid, cv=5, verbose=2, n_jobs=-1)

#Fitting the model
grid.fit(X_train, y_train)

#Best parameters
print(grid.best_params_)

Fitting 5 folds for each of 128 candidates, totalling 640 fits
{'max_depth': 2, 'min_samples_leaf': 2, 'min_samples_split': 2, 'random_state': 0}

#model with best parameters
rfc = RandomForestClassifier(max_depth=4, min_samples_leaf=2, min_samples_split=2, random_state=0)

#Fitting the model
rfc.fit(X_train, y_train)

#training accuracy
print('Training Accuracy: ', rfc.score(X_train, y_train))

#prediction
r_pred = rfc.predict(X_test)

Training Accuracy:  0.8852459016393442

from sklearn.tree import DecisionTreeClassifier

#Decision Tree Classifier Object
dtc = DecisionTreeClassifier()

from sklearn.model_selection import GridSearchCV

#parameters for GridSearchCV
para_grid = {
    'min_samples_split': [2,4,6,8],
    'max_depth': [2,4,6,8],
    'min_samples_leaf': [2,4,6,8],
    'random_state': [0,42]
}

#GridSearchCV Object
grid = GridSearchCV(estimator=dtc, param_grid=para_grid, cv=5, verbose=2, n_jobs=-1)

#Fitting the model
grid.fit(X_train, y_train)

#Best parameters
print(grid.best_params_)

Fitting 5 folds for each of 128 candidates, totalling 640 fits
{'max_depth': 4, 'min_samples_leaf': 4, 'min_samples_split': 2, 'random_state': 0}

#model with best parameters
dtc = DecisionTreeClassifier(max_depth=2, min_samples_leaf=6, min_samples_split=2, random_state=0)

#Fitting the model
dtc.fit(X_train, y_train)

#training accuracy
print('Training Accuracy: ', dtc.score(X_train, y_train))

#prediction
d_pred = dtc.predict(X_test)

Training Accuracy:  0.8852459016393442

from sklearn.metrics import confusion_matrix
fig, ax = plt.subplots(1,2,figsize=(15, 5))
sns.heatmap(confusion_matrix(y_test, r_pred), annot=True, ax=ax[0], fmt='g').set_title('Random Forest Classifier')
sns.heatmap(confusion_matrix(y_test, d_pred), annot=True, ax=ax[1], fmt='g').set_title('Decision Tree Classifier')

Text(0.5, 1.0, 'Decision Tree Classifier')

fig, ax = plt.subplots(1,2,figsize=(15, 5))
sns.distplot(y_test, ax=ax[0], color = 'r').set_title('Random Forest Classifier')
sns.distplot(r_pred, ax=ax[0], color = 'b')
sns.distplot(y_test, ax=ax[1], color = 'r').set_title('Decision Tree Classifier')
sns.distplot(d_pred, ax=ax[1], color = 'b')

<Axes: title={'center': 'Decision Tree Classifier'}, xlabel='SFR', ylabel='Density'>

from sklearn.metrics import classification_report

print('Random Forest Classifier\n', classification_report(y_test, r_pred),'\n')
print('Decision Tree Classifier\n', classification_report(y_test, d_pred))

Random Forest Classifier
               precision    recall  f1-score   support

           0       0.87      1.00      0.93        46
           1       0.00      0.00      0.00         7

    accuracy                           0.87        53
   macro avg       0.43      0.50      0.46        53
weighted avg       0.75      0.87      0.81        53
 

Decision Tree Classifier
               precision    recall  f1-score   support

           0       0.87      1.00      0.93        46
           1       0.00      0.00      0.00         7

    accuracy                           0.87        53
   macro avg       0.43      0.50      0.46        53
weighted avg       0.75      0.87      0.81        53

Column Name	Description
Company	Name of the company
SFR	SpaceFund Realty rating of the company
Payload(kg)	Payload of the mission
Launch Cost(million USD)	Launch cost of the mission
Price per kg	Price per kg payload of the mission
Launch Class	Launch class of the mission
Orbit Altitude	Orbit altitude of the mission
Tech Type	Technology type of the mission
Country	Country of the company
HQ Location	Headquarters location of the company
Description	Description of the mission

	SFR	Payload (kg)	Launch Cost ($M)	Price ($/kg)
count	183.000000	183.000000	183.000000	183.000000
mean	3.726776	2579.677596	4.840956	6587.256831
std	2.527148	8834.385310	19.132872	12755.241486
min	0.000000	0.000000	0.000000	0.000000
25%	2.000000	16.000000	0.000000	0.000000
50%	3.000000	186.000000	0.000000	0.000000
75%	5.000000	746.500000	1.500000	9250.000000
max	9.000000	63800.000000	170.000000	100000.000000

Space Fund Realty (SFR) Analysis¶

Data Dictionary¶

Data Preprocessing¶

Grouping the companies by their description¶

Exploratory Data Analysis¶

Country of Origin¶

Tech Type¶

Launch Class¶

Orbit Altitude¶

Company Description¶

Payload Distribution¶

Launch Cost Distribution¶

SFR Distribution¶

Till now, I have visualized the distribution of the data and got a better understanding of the data. Now, I will be looking at the relationship between the SFR rating and the independent variables.¶

Top 10 countries with SFR greater than 6¶

SFR and Tech Type¶

SFR and Launch Class¶

SFR and Orbit Altitude¶

SFR and Description¶

SFR and Payload¶

SFR and Launch Cost¶

Data Preprocessing Part 2¶

Outlier removal¶

Label Encoding the object type columns¶

Coorelation Matrix Heatmap¶

Train Test Split¶

Model Building¶

Random Forest Classifier¶

Hyperparameter Tuning with GridSearchCV¶

Decision Tree Classifier¶

Hyperparameter Tuning with GridSearchCV¶

Model Evaluation¶

Confusion Matrix¶

Distribution Plot¶

Classification Report¶

Conclusion¶

	Company	SFR	Payload (kg)	Launch Cost ($M)	Price ($/kg)	Funding ($M)	Launch Class	Orbit Altitude	Tech Type	Country	HQ Location	Description
0	Arianespace/Avio	9	20,000	170.00	8,500	Public	Medium, Heavy	LEO	Rocket	Italy	Colleferro	Developing the Vega & Ariane launch vehicles
1	Astra Space	9	300	3.95	13,167	Public	Small	LEO	Rocket	United States	Alameda, CA	Providing routine launch access to Earth orbit...
2	Black Sky Aerospace	9	350	0.60	1,714	-	Small	Suborbital	Rocket	Australia	Browns Plains, Queensland	Offers a range of sounding rockets, capable of...
3	Blue Origin	9	0	0.00	0	-	Tourism, Small, Heavy	Suborbital	Rocket	United States	Kent, WA	Lowering the cost of access to space with reus...
4	CNIM Air Space	9	2,700	0.00	0	Public	Small, Medium	Suborbital	Balloon	France	Ayguesvives	Balloons that lift anything from a few kilogra...

	Company	SFR	Payload (kg)	Launch Cost ($M)	Launch Class	Orbit Altitude	Tech Type	Country	Description
0	Arianespace/Avio	9	20000.000000	170.000000	Medium	LEO	Rocket	Italy	Launch Vehicle Development
1	Astra Space	9	300.000000	3.950000	Small	LEO	Rocket	United States	Launch Services
2	Black Sky Aerospace	9	350.000000	0.600000	Small	Suborbital	Rocket	Australia	Balloon-Based Technologies
3	Blue Origin	9	2579.677596	4.840956	Tourism	Suborbital	Rocket	United States	Space Access and Technology Innovation
4	CNIM Air Space	9	2700.000000	4.840956	Small	Suborbital	Other	France	Space Access and Technology Innovation